### CODE FOR FIGURE 2 ###
library(tidyverse)
library(broom)
library(estimatr)
library(deming)
library(xtable)
library(rsample)
set.seed(78712)

# 1, 2, and 5 shared time
# 33, 35, 36 shared time
# 40 and 43 shared time
# These are blocked together for block-boostrapped standard errors

# function to extract ATEs
Get_ATE <- function(data, subsample = c(0,1)) {
      out <- tidy(lm_robust(formula(data$FORMULA[1]), data, 
                            subset = TNRFU %in% subsample))
      out %>% slice_tail() %>%
            mutate(StudyID = data$StudyId[1])
}

# set working directory
setwd("data/analysis/cleaned")

##### WHOLE SAMPLE #####
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

dem_all <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

all_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

all_se_int <- sd(all_se_ints)

# put bootstrapped intercepts and betas into a data frame
boots <- tibble(alpha = all_se_ints, beta = all_se_slopes)

## create confidence band
dem_dat <- dem_dat |> mutate(prediction = dem_all$coefficients["(Intercept)"] + estimate.e * dem_all$coefficients["estimate.e"],
                             ## create the upper bound
                             upper_band = prediction + 1.96 * sqrt(var(boots$alpha) + estimate.e^2 * var(boots$beta) - 2 * estimate.e * cov(boots$alpha, boots$beta)),
                             ## create the lower bound
                             lower_band = prediction - 1.96 * sqrt(var(boots$alpha) + estimate.e^2 * var(boots$beta) - 2 * estimate.e * cov(boots$alpha, boots$beta))
)

## Plot ##
ggplot(dem_dat, aes(x = estimate.e, y = estimate.r)) + 
      geom_point() + 
      geom_ribbon(aes(ymin = lower_band, ymax = upper_band), 
                  fill = "gray70", alpha = 0.5) +
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .25) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .25) + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2) + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2) + 
      geom_abline(slope = 1, intercept = 0, alpha = .5, linetype = 2) +
      geom_abline(intercept = dem_all$coefficients[1], slope = dem_all$coefficients[2], alpha = .8) +
      xlab("Standardized Effect among Eager Respondents") + 
      ylab("Standardized Effect among Reluctant Respondents") +
      coord_cartesian(xlim =c(-.92, .92), ylim = c(-.92, .92)) +
      theme_classic() +
      theme(legend.position = c(.85, .3),
            legend.title = element_text(size=14),
            legend.text = element_text(size=14),
            axis.text = element_text(size=14),
            axis.title = element_text(size=15),
            axis.ticks.length = unit(2, "mm")) 
ggsave(file = "../../../results/Figure_2.pdf")

#### TABLE J6 ####
write_file(dem_dat |> select(StudyID, estimate.e, std.error.e, df.e, estimate.r, std.error.r, df.r) |>
      mutate(df.e = as.integer(df.e), 
             df.r = as.integer(df.r),
             StudyID = as.integer(StudyID)) |>
      rename("Study No." = StudyID, "Eager Estimate" = estimate.e, "Eager SE" = std.error.e, "Eager N" = df.e,
             "Rel. Estimate" = estimate.r, "Rel. SE" = std.error.r, "Rel. N" = df.r) |>
      xtable(digits = 3, align = c("l", "l", "l", "l", "l", "l", "l", "l"),
             caption = "Eager and Reluctant ATEs, SEs, and Ns (Full Sample)") |>
      print.xtable(include.rownames = F), 
      file = "../../../results/Table_J6.tex")




